# -*- coding:utf-8 -*-
'''
@author: xiaoming
@contact: lishihui0129@163.com
@time: 2017/6/28 9:55
@desc:
'''

import re
import os
from myUtil import urlPattern,flagSplitPattern,domainSuffix
from mysqlHelper import insertSql,close,connectDB


def urlExtract(string):
    '''
    :param string: 待提取对象
    :return:提取结果
    '''
    temp=[]
    tmp=re.findall(domainSuffix,string)
    if len(tmp)!=0:
        if 'http' in string:
            temp.append(string[string.index('http'):])
        elif 'www' in string:
            temp.append(string[string.index('www'):])
        # temp.append(string)
    return temp

# def prefixFlag(string):
#     '''
#     :param string: 待提取对象
#     :return:前缀信息
#     '''
#     linshi=string.lower()
#     if '网址' in linshi:
#         ind=linshi.index('网址')
#     elif '网站' in linshi:
#         ind=linshi.index('网站')
#     elif '官网' in linshi:
#         ind=linshi.index('官网')
#     elif '链接' in linshi:
#         ind=linshi.index('链接')
#     elif 'web' in linshi:
#         ind=linshi.index('web')
#     else:
#         ind=0
#
#
#     temp=string[:ind]
#     res=re.split(flagSplitPattern,temp)
#     return res[-1].strip()



# def insertUrlTable(string):
#     res=re.findall(urlPattern,string)
#     res=list(set(res))
#     flag=prefixFlag(string)
#     # flag="临时"
#     # temp=[]
#     connectDB()
#     sql="insert into url(url_value,prefix) values(%s,%s)"
#
#     for each in res:
#         param=(each,flag)
#         insertSql(sql,param)
#     close()
